In [1]:
%matplotlib inline
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import random
from sklearn.datasets import fetch_mldata
from sklearn import cross_validation
from sklearn import base
#mnist = fetch_mldata('iris')
In [2]:
mnist = fetch_mldata('iris')
X= mnist.data
y= mnist.target
for idx,i in enumerate(y):
if (i==2) or (i==3):
y[idx]=-1
In [2]:
ds = sklearn.datasets.make_classification(n_samples=10000,
n_features=30,
n_informative=15,
n_redundant=0,
n_repeated=0,
n_classes=2,
n_clusters_per_class=2,
weights=None,
flip_y=0.01,
class_sep=1.0,
hypercube=True,
shift=0.0,
scale=1.0,
shuffle=True,
random_state=None)
X= ds[0]
y= ds[1]
In [3]:
# labels: [0,1] -> [-1,1]
for idx,i in enumerate(y):
if (i==0):
y[idx]=-1
print(X[0])
print(y[0])
In [4]:
import copy
In [ ]:
class GradientDescent(base.BaseEstimator):
def __init__(self,theta,lamb,eps):
#t = copy.deepcopy(theta)
#self.theta=t
self.theta=theta
self.eps=eps
self.lamb=lamb
self.tmp=self.theta
def fit(self,X,y,nbIt=1000,printevery=-1):
l=len(X)
xTrans = X.transpose()
for i in xrange(0,nbIt):
#self.theta=self.tmp
if printevery!=-1 and i%printevery==0:
pass
index = np.random.randint(l)
loss = np.dot(X, self.theta) - y
cost = np.sum(loss ** 2) * (1 / l) + (self.lamb*np.linalg.norm(self.theta))
#gradient = np.dot(xTrans,(np.dot(theta,xTrans)-y))+np.sign(theta)*self.lamb
#thetaprime = self.theta - self.eps * gradient
gradient = np.dot(xTrans,(np.dot(theta,xTrans)-y))
thetaprime = self.theta - self.eps * gradient
thetaprime = self.theta - self.eps * np.sign(theta)*self.lamb
#if i%(nbIt/100)==0:
# thetaprime = self.theta - self.eps * (np.sign(theta)*self.lamb)
#else:
# thetaprime = self.theta - self.eps * gradient
for k in xrange(0,len(self.theta)):
self.theta[k] = 0 if thetaprime[k]*self.theta[k]<0 else thetaprime[k]
self.tmp=self.theta
if printevery!=-1 and i%printevery==0:
print("Iteration %s | Cost: %f | Score: %.03f" % (str(i).ljust(6), cost,self.score(X,y)))
print("%d features used"%(self.nb_used_features()))
def predict(self,x):
#print("Product: %f"%(np.dot(x,self.theta)))
ret=[]
for i in x:
ret.append(1 if np.dot(i,self.theta)>0 else -1)
return ret
def score(self,X,y):
cpt=0.0
allpred = self.predict(X)
for idx,i in enumerate(allpred):
cpt += 1 if i==y[idx] else 0
#print(cpt,len(X))
return cpt/len(X)
def nb_used_features(self):
cpt=len(self.tmp)
for ii in self.tmp:
if ii==0:
cpt-=1
return cpt
In [13]:
class GradientDescent(base.BaseEstimator):
def __init__(self,theta,lamb,eps):
self.theta=theta
self.eps=eps
self.lamb=lamb
def fit(self,X,y,nbIt=1000,printevery=-1):
l=len(X)
xTrans = X.transpose()
for i in xrange(0,nbIt):
index = np.random.randint(l)
loss = np.dot(X, self.theta) - y
#cost = np.sum(loss ** 2) / (2 * l) + (self.lamb*np.linalg.norm(self.theta))
cost = np.sum(loss ** 2) * (1 / l) + (self.lamb*np.linalg.norm(self.theta))
#gradient = np.dot(xTrans,(np.dot(theta,xTrans)-y))+np.sign(theta)*self.lamb
#thetaprime = self.theta - self.eps * gradient
gradient = np.dot(xTrans,(np.dot(self.theta,xTrans)-y))
#thetaprime = self.theta - self.eps * gradient
#thetaprime = self.theta - self.eps * np.sign(theta)*self.lamb
if i%(nbIt/100)==0:
thetaprime = self.theta - self.eps * (np.sign(theta)*self.lamb)
else:
thetaprime = self.theta - self.eps * gradient
for k in xrange(0,len(theta)):
theta[k] = 0 if thetaprime[k]*theta[k]<0 else thetaprime[k]
if printevery!=-1 and i%printevery==0:
print("Iteration %s | Cost: %f | Score: %.03f" % (str(i).ljust(6), cost,self.score(X,y)))
print("%d features used"%(self.nb_used_features()))
def predict(self,x):
#print("Product: %f"%(np.dot(x,self.theta)))
ret=[]
for i in x:
ret.append(1 if np.dot(i,self.theta)>0 else -1)
return ret
def score(self,X,y):
cpt=0.0
allpred = self.predict(X)
for idx,i in enumerate(allpred):
cpt += 1 if i==y[idx] else 0
#print(cpt,len(X))
return cpt/len(X)
def nb_used_features(self):
cpt=len(self.theta)
for ii in self.theta:
if ii==0:
cpt-=1
return cpt
In [14]:
#theta = np.zeros(len(X[0]))
theta = X[0]
lamb=0.1
eps=0.0001
#gd = SimpleGradientDescent(theta,eps)
gd = GradientDescent(theta,lamb,eps)
In [15]:
nbIterations = 2000
gd.fit(X,y,nbIterations,printevery=nbIterations/10)
print(gd.tmp)
print(gd.nb_used_features())
In [ ]:
gd.score(X,y)
In [ ]:
scoresGrad = cross_validation.cross_val_score(gd, X, y, cv=5,scoring="accuracy")
print("Cross validation scores: %s, mean: %.02f"%(scoresGrad,np.mean(scoresGrad)))
In [ ]:
eps=0.00001
la = []
cross_sc = []
used_features = []
for lamb in np.arange(0,1.1,0.1):
theta = X[0]
gd = GradientDescent(theta,lamb,eps)
nbIterations = 1000
gd.fit(X,y,nbIterations)
scoresSvm = cross_validation.cross_val_score(gd, X, y, cv=5,scoring="accuracy")
print("Lamda: %.02f | Cross val mean: %.02f | Features: %d"%(lamb,np.mean(scoresSvm),gd.nb_used_features()))
cross_sc.append(np.mean(scoresSvm))
la.append(lamb)
used_features.append(gd.nb_used_features())
In [ ]:
eps=0.00001
la = []
cross_sc = []
used_features = []
for lamb in np.arange(0,1.1,0.1):
#theta = np.zeros(len(X[0]))
theta = X[0]
lamb=0.4
eps=0.00001
#gd = SimpleGradientDescent(theta,eps)
gd = GradientDescent(theta,lamb,eps)
nbIterations = 2000
gd.fit(X,y,nbIterations,printevery=nbIterations/10)
print(gd.nb_used_features())
In [ ]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(la, cross_sc, '#6DC433')
ax2.plot(la, used_features, '#5AC8ED')
ax1.set_xlabel('lambda')
ax1.set_ylabel('Cross val score', color='#6DC433')
ax2.set_ylabel('Nb features used', color='#5AC8ED')
ax1.yaxis.grid(False)
ax2.grid(False)
plt.show()
In [ ]:
class GradientDescentL2(base.BaseEstimator):
def __init__(self,theta,lamb,eps):
self.theta=theta
self.eps=eps
self.lamb=lamb
def fit(self,X,y,nbIt=1000,printevery=-1):
l=len(X)
xTrans = X.transpose()
for i in xrange(0,nbIt):
index = np.random.randint(l)
loss = np.dot(X, self.theta) - y
cost = np.sum(loss ** 2) / (2 * l) + (self.lamb*(np.linalg.norm(-self.theta)**2))
gradient = np.dot(xTrans,(np.dot(theta,xTrans)-y))+np.sign(theta)*self.lamb
thetaprime = self.theta - self.eps * gradient
for k in xrange(0,len(theta)):
theta[k] = 0 if thetaprime[k]*theta[k]<0 else thetaprime[k]
if printevery!=-1 and i%printevery==0:
print("Iteration %s | Cost: %f" % (str(i).ljust(6), cost))
def predict(self,x):
#print("Product: %f"%(np.dot(x,self.theta)))
ret=[]
for i in x:
ret.append(1 if np.dot(i,self.theta)>0 else -1)
return ret
def score(self,X,y):
cpt=0.0
allpred = self.predict(X)
for idx,i in enumerate(allpred):
cpt += 1 if i==y[idx] else 0
print(cpt,len(X))
return cpt/len(X)
In [ ]:
theta = np.zeros(len(X[0]))
lamb=0.05
eps=0.00001
gd = GradientDescentL2(theta,lamb,eps)
nbIterations = 20000
gd.fit(X,y,nbIterations,printevery=nbIterations/10)
print("Score: %s"%gd.score(X,y))
scoresSvm = cross_validation.cross_val_score(gd, X, y, cv=5,scoring="accuracy")
print("Cross validation scores: %s, mean: %.02f"%(scoresSvm,np.mean(scoresSvm)))
In [ ]:
eps=0.00001
la = []
cross_sc = []
for lamb in np.arange(0,12,0.5):
theta = np.zeros(len(X[0]))
gd = GradientDescentL2(theta,lamb,eps)
nbIterations = 5000
gd.fit(X,y,nbIterations)
scoresSvm = cross_validation.cross_val_score(gd, X, y, cv=5,scoring="accuracy")
print("Lamda: %.02f, Cross val mean: %.02f"%(lamb,np.mean(scoresSvm)))
cross_sc.append(np.mean(scoresSvm))
la.append(lamb)
In [ ]:
import matplotlib.pyplot as plt
plt.plot(la,cross_sc)
plt.ylabel('Cross val score')
plt.xlabel('lambda')
plt.show()
In [ ]: